library(tidyverse)
library(readxl)
library(openintro) #for state abbreviations
source('lib/fix-names.R')
options(scipen=0, digits=7) #to standardize number of digits when exporting data. all data here is run using these options.

##DEMOGRAPHIC CONTROLS ------------
LESO_path <- "raw-data/LESO-Controls"
tempcitydemographics = list.files(path=LESO_path,pattern="citypopdemographics20[0-9][0-9].csv")
# column names changed in 2015
year2015 <- tempcitydemographics[6]
tempcitydemographics <- tempcitydemographics[1:5]
tempcitydemographics <- lapply(file.path(LESO_path,tempcitydemographics), read_csv)

demog <- tempcitydemographics %>%
  plyr::rbind.fill(.) %>%
  select(Id2,state,city,year,popmales,pop1519,pop2024,pop2534,percblack,
         `Estimate; SEX AND AGE - Total population`) %>%
  rename(Total_Population=`Estimate; SEX AND AGE - Total population`) %>%
  na_if(.,"-") %>%
  mutate_at("percblack",as.numeric)

rm(tempcitydemographics)  

tempcitydemographics <- lapply(file.path(LESO_path,year2015), read_csv)

demog2 <- tempcitydemographics %>%
  plyr::rbind.fill(.) %>%
  select(Id2,state,city,year,malepop,pop1519,pop2024,pop2534,percblack,
         `Estimate; SEX AND AGE - Total population`) %>%
  rename(Total_Population=`Estimate; SEX AND AGE - Total population`) %>%
  rename(popmales = malepop) %>%
  na_if(.,"-") %>%
  mutate_at("percblack",as.numeric)

demog <- bind_rows(demog,demog2)
save(demog, file="data/demog.RData")

# Demographic variables by county ---------
tempcountydemographics = list.files(path=LESO_path,pattern="countypopdemographics20[0-9][0-9].csv")
cdemog <- lapply(file.path(LESO_path,tempcountydemographics), read_csv)

countydemog <- cdemog %>%
  plyr::rbind.fill(.) %>%
  select(county,state,popmales,pop1519,pop2024,pop2534,percblack,year,
         `Estimate; SEX AND AGE - Total population`) %>%
  rename(Population=`Estimate; SEX AND AGE - Total population`)

rm(tempcountydemographics)  

countydemog$county <- ifelse(countydemog$state=="Louisiana",paste(countydemog$county, "Parish",sep=" "),
                       ifelse(countydemog$state=="Alaska",
                              paste(countydemog$county),
                              paste(countydemog$county,"County",sep=" ")))

#Fix some counties to merge with fips codes
alaska <- which(countydemog$state=="Alaska")
countydemog$county[alaska] <- gsub(pattern=" County",x=countydemog$county[alaska],replacement="")
louisiana <- which(countydemog$state=="Louisiana")
countydemog$county[louisiana] <- gsub(pattern="County",x=countydemog$county[louisiana],replacement="Parish")
countydemog$county <- fix_VA_counties(countydemog$county)

#County changed name in 2015, was Shannon prior
countydemog$county[countydemog$county=="Oglala Lakota County"] <- "Shannon County"

dona_ana <- grep('Ana',countydemog$county,ignore.case = F)
nm <- which(countydemog$state=='New Mexico')
dona_ana <- dona_ana[dona_ana %in% nm] #keep only Dona Ana in New Mexico
countydemog$county[dona_ana] <- "Dona Ana County"

countydemog$county[countydemog$county=="Petersburg Borough"] <- "Petersburg Census Area"
countydemog$county[countydemog$county=="Kusilvak Census Area"] <- "Wade Hampton Census Area" #name change
countydemog$county[countydemog$county=="District of Columbia County"] <- "District of Columbia"
countydemog$county[countydemog$county=="Baltimore city County"] <- "Baltimore city"
countydemog$county[countydemog$county=="St. Louis city County"] <- "St. Louis city"
countydemog$county[countydemog$county=="Carson City County"] <- "Carson City"
countydemog$county[countydemog$county=="LaSalle Parish"] <- "La Salle Parish"

countydemog$state_abbrv <- state2abbr(countydemog$state)

# Load in fips codes ---------
countydemog.full <- read_csv("raw-Data/countyfips.csv") %>%
  mutate(state.fips = str_pad(state.fips,width=2,pad=0,side="left"),
         county.fips = str_pad(county.fips,width=2,pad=0,side="left")) %>%
  left_join(countydemog,.,by=c("state_abbrv"="State","county"="County")) 

save(countydemog.full, file='data/demog_county.RData')
